
******************
** Postal code / NUTS3 concodrance table **
******************
run "$P_Main/Stata_Data_Prep_a_ZIP_NUTS3.do"

 
******************
** Generating Demographics Data **
******************

run "$P_Main/Stata_Data_Prep_b_demographics.do"


******************
** Generating Wholesale Price Data **
******************

run "$P_Main/Stata_Data_Prep_c_wholesale_e5.do"

******************
** Generating Weather Data **
******************

run "$P_Main/Stata_Data_Prep_d_weather.do"

******************
** Generating Broadband Data **
******************

run "$P_Main/Stata_Data_Prep_e_broadband_speed.do"

******************
** Generating Data on Prices within the Day **
******************

run "$P_Main/Stata_Data_Prep_f_mean_hourly_prices.do" 

******************
** Generating Data on Price Responsiveness **
******************

run "$P_Main/Stata_Data_Prep_g_price_responsiveness.do" 

******************
** Generating Top 5 Brand Data **
******************

run "$P_Main/Stata_Data_Prep_h_top5_brands.do" 


******************
** Nearest **
******************

run "$P_Main/Stata_Data_Prep_i_nearest.do" 

******************
** Autobahn **
******************

run "$P_Main/Stata_Data_Prep_j_autobahn.do" 




******************
** Input and Merge Data **
******************

** take data of stations
clear
use "$P_Data/Price/Daily_AvgP_WA_7to21_e5.dta", clear 


capture drop index

tostring YMD, replace force
gener YMD2 = YMD
destring YMD2, replace force
gen date=date(YMD, "YMD")
drop YMD
rename YMD2 YMD
format date %td
gener year = year(date)
gener month = month(date)
gener year_month =ym(year,month)

drop if year<2016
drop if year>2018


xtset StID date

replace AvgP = AvgP/1000


** merge in adopters & adoption dates
merge m:1 StID using "$P_Data_Processed/adopter_measures_4week_e5.dta"


gener break_date = avgdate if _merge==3
gener treat_group = 1 if _merge==3
replace break_date = 10^5 if _merge==1
replace treat_group = 0 if missing(treat_group)
drop _merge

capture drop Brand
capture drop Post


****

capture drop week
gener week = week(date)

gener weekly_date = yw(year,week)

gener treat_period = 1 if weekly_date>=break_date
replace treat_period = 0 if weekly_date<break_date


**** 

*** merge in regional wholesale prices

merge m:1 StID using "$P_Data_Processed/wholesale_price_concordance.dta" 
drop if _merge==2
drop _merge

merge m:1 date region using  "$P_Data_Processed/wholesale_price_e5.dta"
drop _merge

**** compute margins
**  taxes
* https://www.eea.europa.eu/data-and-maps/indicators/fuel-prices-and-taxes/assessment-5
gener tax = 0.6545
* add VAT 
** VAT needs to be 19% in Germany
* 

gener margin_wholesale = AvgP/1.19 - mean_w_price 


* drop if avg price is too low... 
keep if AvgP>0


capture drop Name St StNum City Lat Lng nOccur
capture drop ID
capture drop Sig
capture drop Post
capture drop Brand
		
* merging in gas station info *
merge m:1 StID using "$P_Data/GS/gas_station.dta"
keep if _merge==3
drop _merge

drop Name St StNum City Lat Lng nOccur
drop ID


* merging in demographics
* first merge in concordance table
merge m:1 Post using "$P_Data_Processed/NUTS3_Post_concordance_table.dta"
drop if _merge==2
drop _merge
merge m:1 nuts3 year using "$P_Data_Processed/demog.dta"
drop if _merge==2
drop _merge
gener l_gdp = ln(GDP_current)
gener employed_share = employed_pop/tot_pop
gener l_pop = ln(tot_pop)


** merging in weather data **
merge m:1 StID using "$P_Data_Processed/weather_concordance.dta"
drop if _merge==2
drop _merge

merge m:1 sdo_id date using "$P_Data_Processed/weather_data.dta"
drop if _merge==2
drop _merge

* generating monthly avg weather vars 
gegen mean_temp = mean(temperature), by(StID year_month)
gegen sd_temp = sd(temperature), by(StID year_month)
gegen mean_precip = mean(precipitation), by(StID year_month)
gegen sd_precip = sd(precipitation), by(StID year_month)
* generating other monthly variables 
fsort StID year_month date
by StID year_month: gener n_days_active_month = _N


gegen month_treat_period = max(treat_period), by(StID year_month)
gegen mean_price = mean(AvgP), by(StID year_month)
 
gegen mean_wh_margin = mean(margin_wholesale), by(StID year_month)


** labelling variables **

label variable mean_wh_margin "Mean Margin"

label variable mean_price "Mean Price"
** controls **
label variable l_gdp "ln(Regional GDP)"
label variable l_pop "ln(Total Regional Population)"
label variable employed_share "Regional Employment Share (employed/pop)"
label variable pop_density "Regional Population Density (pop/$km^2$)"
label variable med_age "Regional Median Population Age"

label variable mean_temp "Mean Temperature (degrees Celsius)"
label variable sd_temp "Std. Dev. Temperature (degrees Celsius)"
label variable mean_precip "Mean Precipitation (mm)"
label variable sd_precip "Std. Dev. Precipitation (mm)"

save "$P_Data_Processed/daily_station_data_e5_7_21.dta", replace


** collapsing to market data 

gegen min_date_StID = min(date), by(StID year_month)
keep if date==min_date_StID
drop min_date_StID

save "$P_Data_Processed/monthly_station_data_e5.dta", replace


** generating the markets we want to use 

clear 

insheet using "$P_Data/Market/all/20_80.csv"
rename stid StID
by mktid, sort: gener mkt_n_stations = _N
save "$P_Data_Processed/cluster_mkts.dta", replace

********************************************************
** Creating final month/station-level data ** 
********************************************************
*** 
** merging additional data & processing monthly data ***

clear 

use "$P_Data_Processed/monthly_station_data_e5.dta", clear

by year_month Post, sort: gener postal_n_stations = _N
gener postal_n_others = postal_n_stations-1
drop if Post==0

** defining treatment 

capture drop treat
gener treat = month_treat_period*treat_group
label variable treat "Adopter" 

**
** drop outlier observations (top and bottom 1%)
drop if mean_wh_margin<0
drop if mean_wh_margin>0.2
drop if mean_price<1.18
drop if mean_price>1.7
	

sort StID year_month
by StID: gener first_treat_month = year_month if treat==1 & treat[_n-1]==0
sort StID first_treat_month
by StID: replace first_treat_month = first_treat_month[1]
replace first_treat_month = 1000 if missing(first_treat_month)
	
* number of other adopters in your Postal code
by Post year_month, sort: egen n_compet_adopt = total(treat)
replace n_compet_adopt = n_compet_adopt - 1 if treat==1	
	
** generating simple IV - share of adopters in your brand
by Brand year_month, sort: gener n_brand = _N
by Brand year_month: egen n_treat = total(treat)

gener share_others_treated = (n_treat)/(n_brand-1) if treat==0
replace share_others_treated = (n_treat-1)/(n_brand-1) if treat==1

label variable n_brand "N Brand Stations"
label variable share_others_treated "Share Brand Adopters"
label variable postal_n_stations "N Competitors in ZIP"
label variable n_compet_adopt "N Competitors Adopting"

		
merge m:1 StID using "$P_Data_Processed/cluster_mkts.dta"
keep if _merge==3
drop _merge

gener station = 1 
gegen n_act_stations = total(station), by(mktid year_month)

gegen n_mkt_compet_adopt = total(treat), by(year_month mktid)
replace n_mkt_compet_adopt = n_mkt_compet_adopt - 1 if treat==1

capture drop n_months_since_treat 
sort StID year_month
capture drop age 
by StID: gener age = _n
gener n_months_since_treat = year_month - first_treat_month
replace n_months_since_treat=. if n_months_since_treat<-31


label variable treat "Adopter $\times$ post-Adoption"

capture drop treat_m*
	
gener treat_m6month1 = 1 if n_months_since_treat>=-6 & n_months_since_treat<-2  & n_months_since_treat!=.
replace treat_m6month1=0 if missing(treat_m6month1)
	
gener treat_m6month2 = 1 if n_months_since_treat>=-12 & n_months_since_treat<-6  & n_months_since_treat!=.
replace treat_m6month2=0 if missing(treat_m6month2)

gener treat_m6month3 = 1 if  n_months_since_treat<-12  & n_months_since_treat!=.
replace treat_m6month3=0 if missing(treat_m6month3)
	

label variable treat_m6month1 "Adopter $\times$ 1-6 Months pre-Adoption"
label variable treat_m6month2 "Adopter $\times$ 7-12 Months pre-Adoption"
label variable treat_m6month3 "Adopter $\times$ 13+ Months pre-Adoption"
	

capture drop IV_m*

	gener IV_m6month1 = share_others_treated if treat_m6month1==1
	replace IV_m6month1 = 0 if missing(IV_m6month1)
	gener IV_m6month2 = share_others_treated if treat_m6month2==1
	replace IV_m6month2 = 0 if missing(IV_m6month2)
	gener IV_m6month3 = share_others_treated if treat_m6month3==1
	replace IV_m6month3 = 0 if missing(IV_m6month3)

label variable IV_m6month1 "Share Brand Adopters $\times$ 1-6 Months pre-Adoption"
label variable IV_m6month2 "Share Brand Adopters $\times$ 7-12 Months pre-Adoption"
label variable IV_m6month3 "Share Brand Adopters $\times$ 13+ Months pre-Adoption"

*merge in wifi speed 
capture drop nuts2
gener nuts2 = substr(nuts3,1,4)
merge m:1 nuts2 year using "$P_Data_Processed/broadband_speed.dta"
drop if _merge==2
drop _merge
			
label variable at_least_10mbps "At Least 10 Mb/s Internet Available Dummy"


save "$P_Data_Processed/monthly_station_data_e5_merged.dta", replace 

**********************************************************************
** Creating final month/market-level duopoly/triopoly data ** 
**********************************************************************

use "$P_Data_Processed/daily_station_data_e5_7_21.dta", clear
			
merge m:1 StID using "$P_Data_Processed/cluster_mkts.dta"
keep if _merge==3
drop _merge

drop if Post==0

merge 1:1 StID date using "$P_Data_Processed/daily_price_change_cluster_5min.dta"
rename DD DD_5min
rename DU DU_5min
rename UD UD_5min
rename UU UU_5min
rename Rival Rival_5min
rename URival DRival_5min
rename DRival Urival_5min
drop if _merge==2
drop _merge
	
gegen daily_market_price = mean(AvgP), by(mktid date)
gegen daily_market_wh_margin = mean(margin_wholesale), by(mktid date)
	
gegen market_mean_wh_margin = mean(daily_market_wh_margin), by(mktid year_month)
	
gegen market_mean_price = mean(daily_market_price), by(mktid year_month)
	
** Price Responses: 

gener DD_5min_share = DD_5min/(DD+DU)
gener UU_5min_share = UU_5min/(UU+UD)
gegen mean_DD_5min_share = mean(DD_5min_share), by(mktid year_month)
gegen mean_UU_5min_share = mean(UU_5min_share), by(mktid year_month)
	
by StID year_month, sort: keep if _n==1 
	

by year_month Post, sort: gener postal_n_stations = _N
gener postal_n_others = postal_n_stations-1
drop if Post==0

capture drop treat
gener treat = month_treat_period*treat_group

* dropping outlier observations (top 1 and bottom 1%)
drop if mean_price<0
drop if market_mean_wh_margin<0 
drop if market_mean_price<1.2
drop if market_mean_price>1.6

sort StID year_month
by StID: gener first_treat_month = year_month if treat==1 & treat[_n-1]==0
sort StID first_treat_month
by StID: replace first_treat_month = first_treat_month[1]
replace first_treat_month = 1000 if missing(first_treat_month)


** generating simple IV - share of adopters in your brand
by Brand year_month, sort: gener n_brand = _N
by Brand year_month: egen n_treat = total(treat)

gener share_others_treated = (n_treat)/(n_brand-1) if treat==0
replace share_others_treated = (n_treat-1)/(n_brand-1) if treat==1
			
	
** keeping 2 or 3 station markets 
by mktid year_month, sort: gener n_act_stations = _N
	
keep if n_act_stations==2 | n_act_stations==3 	
sort mktid year_month StID
by mktid year_month : gen id=_n
	
capture drop T1
capture drop T2
capture drop IV1
capture drop IV2
	
* calculating market level treatment effects / instruments
by mktid year_month: gen T1 = treat[1]*(1-treat[2])+treat[2]*(1-treat[1]) if n_act_stations==2
by mktid year_month: replace T1 = treat[1]*treat[2]*(1-treat[3]) + treat[1]*treat[3]*(1-treat[2]) + treat[2]*treat[3]*(1-treat[1]) + treat[1]*(1-treat[2])*(1-treat[3])+treat[2]*(1-treat[1])*(1-treat[3]) +treat[3]*(1-treat[1])*(1-treat[2]) if n_act_stations==3
								
by mktid year_month: gen T2 = treat[1]*treat[2] if n_act_stations==2
by mktid year_month: replace T2 = treat[1]*treat[2]*treat[3] if n_act_stations==3
	
		
label variable T1 "Not all Stations Adopted"
label variable T2 "All Stations Adopted"
label variable market_mean_wh_margin "Mean Market Wholesale Margin"
label variable market_mean_price "Mean Market Price"

by mktid year_month: gen IV1 = share_others_treated[1]*(1-share_others_treated[2])+share_others_treated[2]*(1-share_others_treated[1]) if n_act_stations==2
by mktid year_month: replace IV1 = share_others_treated[1]*(1-share_others_treated[2])*(1-share_others_treated[3])+share_others_treated[2]*(1-share_others_treated[1])*(1-share_others_treated[3]) + share_others_treated[3]*(1-share_others_treated[1])*(1-share_others_treated[2])  ///
									+  share_others_treated[1]*share_others_treated[2]*(1-share_others_treated[3]) +  share_others_treated[1]*share_others_treated[3]*(1-share_others_treated[2]) +  share_others_treated[2]*share_others_treated[3]*(1-share_others_treated[1]) ///
									 if n_act_stations==3
	
by mktid year_month: gen IV2 = share_others_treated[1]*share_others_treated[2] if n_act_stations==2
by mktid year_month: replace IV2 = share_others_treated[1]*share_others_treated[2]*share_others_treated[3]  if n_act_stations==3
		
by mktid year_month: gen n_brand1 = n_brand[1]
by mktid year_month: gen n_brand2 = n_brand[2]
by mktid year_month: gen n_brand3 = n_brand[3]
replace n_brand3 = 0 if n_act_stations==2
	
by mktid year_month: gener postal_n_others1 = postal_n_others[1]
by mktid year_month: gener postal_n_others2 = postal_n_others[2]
by mktid year_month: gener postal_n_others3 = postal_n_others[3]
replace postal_n_others3 = 0 if n_act_stations==2

by mktid year_month: gener use_mkt= 1 if _n==1

* for timing tests * 
sort StID year_month
by StID : gener first_T1_month = year_month if T1==1 & T1[_n-1]==0 
by StID: gener first_T2_month = year_month if T2==1 & T2[_n-1]==0 

sort StID first_T1_month
by StID: replace first_T1_month = first_T1_month[1]

sort StID first_T2_month
by StID: replace first_T2_month = first_T2_month[1]
	
	
capture drop n_months_since_T1
gener n_months_since_T1 = year_month - first_T1_month 
	
capture drop n_months_since_T2
gener n_months_since_T2 = year_month - first_T2_month 	

	
capture drop T1_6month*
capture drop T2_6month*
capture drop IV1_6month*
capture drop IV2_6month*

gener T1_6month1 = 1 if n_months_since_T1>=0 & n_months_since_T1<6
replace T1_6month1=0 if missing(T1_6month1)
gener T1_6month2 = 1 if n_months_since_T1>=6 & n_months_since_T1<12
replace T1_6month2=0 if missing(T1_6month2)
gener T1_6month3 = 1 if n_months_since_T1>=12 
replace T1_6month3=0 if missing(T1_6month3)

gener T2_6month1 = 1 if n_months_since_T2>=0 & n_months_since_T2<6
replace T2_6month1=0 if missing(T2_6month1)
gener T2_6month2 = 1 if n_months_since_T2>=6 & n_months_since_T2<12
replace T2_6month2=0 if missing(T2_6month2)
gener T2_6month3 = 1 if n_months_since_T2>=12 
replace T2_6month3=0 if missing(T2_6month3)

gener IV1_6month1 = IV1 if T1_6month1==1
replace IV1_6month1 = 0 if missing(IV1_6month1)
gener IV1_6month2 = IV1 if T1_6month2==1
replace IV1_6month2 = 0 if missing(IV1_6month2)
gener IV1_6month3 = IV1 if T1_6month3==1
replace IV1_6month3 = 0 if missing(IV1_6month3)

gener IV2_6month1 = IV2 if T2_6month1==1
replace IV2_6month1 = 0 if missing(IV2_6month1)
gener IV2_6month2 = IV2 if T2_6month2==1
replace IV2_6month2 = 0 if missing(IV2_6month2)
gener IV2_6month3 = IV2 if T2_6month3==1
replace IV2_6month3 = 0 if missing(IV2_6month3)
	
quietly 	ivreghdfe market_mean_price l_gdp pop_density med_age employed_share l_pop   mean_temp sd_temp mean_precip sd_precip n_brand1 n_brand2 n_brand3 (T1 T2 = IV1 IV2) if  use_mkt==1 , absorb(mktid year_month)  cluster(mktid) 

capture drop sample
gener sample = e(sample)
	
label variable T1_6month1 "1-6 months since at Least One Station Adopted"
label variable T1_6month2 "7-12 months since at Least One Station Adopted"
label variable T1_6month3 "12+ months since at Least One Station Adopted"
label variable T2_6month1 "1-6 months since All Stations Adopted"
label variable T2_6month2 "7-12 months since All Stations Adopted"
label variable T2_6month3 "12+ months since All Stations Adopted"

save "$P_Data_Processed/monthly_duo_trio_mkt_data_e5_merged.dta", replace 
